Age was standardized into months and categorized into self-defined bins.
Statistics were calculated to relate each data attribute and adoption outcome type:
Cramér’s V for categorical variables ‘Breed’, ‘Intake_Reason’, ‘Sex’ against each ‘Outcome Type’: Cramer’s V is the strength statistic of the result of chi-squared test of independence. Measures how strongly 2 categorical variables are correlated with each other.
Point Biserial Correlation between ‘Age’ and each ‘Outcome Type’: Measure strength and direction of association between a continuous variable and a dichotomous/binary variable. A special case of Pearson’s product moment correlation (r) as r measures association between 2 continuous variables.
Statistics were normalized from 0-1 and plotted on radar chart.
optipaw = read.csv("/Users/PengOlivia/Desktop/OptiPaw/optipaw_FINAL.csv")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(fmsb)
library(vcd)
## Warning: package 'vcd' was built under R version 4.3.3
## Loading required package: grid
##
## Attaching package: 'vcd'
##
## The following object is masked from 'package:fmsb':
##
## oddsratio
library(ltm)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: msm
## Warning: package 'msm' was built under R version 4.3.3
## Loading required package: polycor
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:ltm':
##
## factor.scores
##
## The following object is masked from 'package:polycor':
##
## polyserial
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(lubridate)
library(tidyr)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:MASS':
##
## select
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
# Clean the dataset by removing rows with NA values
optipaw_clean <- na.omit(optipaw)
print(unique(optipaw_clean$Outcome.Type))
## [1] "Return to Owner" "Transfer" "Adoption" "Euthanasia"
## [5] "Died" "Rto-Adopt" "Disposal" "Missing"
## [9] "Stolen" "Relocate" "Lost" "Foster"
## [13] "Reclaimed" "Escaped" "Released To Wild"
# Ensure consistent case for categorical column 'Animal.Type'
optipaw_clean$Animal.Type <- tolower(optipaw_clean$Animal.Type)
# Group age into meaningful categories (e.g., 0-1 year, 1-5 years, etc.)
optipaw_clean$Age_in_months <- optipaw_clean$Age * 12 # Convert Age to months
age_groups <- cut(optipaw_clean$Age,
breaks = c(0, 12, 60, 120, Inf),
labels = c("0-1 year", "1-5 years", "5-10 years", "10+ years"))
# Remove rows with NA values for Age
optipaw_clean <- optipaw_clean[!is.na(optipaw_clean$Age), ]
# Check if any NA values remain in Age
sum(is.na(optipaw_clean$Age))
## [1] 0
# Define a function to create radar charts for each outcome type
radarchart_outcome_type <- function(optipaw_clean, outcome_type) {
# Convert relevant columns to appropriate data types
optipaw_clean$Animal.Type <- as.factor(optipaw_clean$Animal.Type)
optipaw_clean$Breed <- as.factor(optipaw_clean$Color) # Assuming 'Color' is a proxy for 'Breed'
optipaw_clean$Intake.Type <- as.factor(optipaw_clean$Intake.Type)
optipaw_clean$Sex <- as.factor(optipaw_clean$Sex)
optipaw_clean$Outcome.Type <- as.factor(optipaw_clean$Outcome.Type)
# Calculate Cramér's V for categorical variables
cramers_v_animal_type <- assocstats(table(optipaw_clean$Animal.Type, optipaw_clean$Outcome.Type))$cramer
cramers_v_breed <- assocstats(table(optipaw_clean$Breed, optipaw_clean$Outcome.Type))$cramer
cramers_v_intake_reason <- assocstats(table(optipaw_clean$Intake.Type, optipaw_clean$Outcome.Type))$cramer
cramers_v_sex <- assocstats(table(optipaw_clean$Sex, optipaw_clean$Outcome.Type))$cramer
# Create a binary variable for the given outcome_type vs. other outcomes
binary_outcome_col <- paste0(outcome_type, "_Binary")
optipaw_clean[[binary_outcome_col]] <- ifelse(optipaw_clean$Outcome.Type == outcome_type, 1, 0)
# Calculate Point Biserial Correlation for Age and the selected outcome type
age_correlation <- biserial.cor(optipaw_clean$Age_in_months, optipaw_clean[[binary_outcome_col]])
# Normalize the correlation factors to (0,1)
max_correlation <- max(cramers_v_animal_type, cramers_v_breed, cramers_v_intake_reason, cramers_v_sex, abs(age_correlation))
# Normalize the values
norm_animal_type <- cramers_v_animal_type / max_correlation
norm_breed <- cramers_v_breed / max_correlation
norm_intake_reason <- cramers_v_intake_reason / max_correlation
norm_sex <- cramers_v_sex / max_correlation
norm_age <- abs(age_correlation) / max_correlation
# Prepare data for the radar chart
data <- data.frame(
Animal_Type = c(1, 0),
Breed = c(1, 0),
Age = c(1, 0),
Intake_Reason = c(1, 0),
Sex = c(1, 0)
)
# Add the normalized correlation values for each variable
data <- rbind(data, c(
norm_animal_type,
norm_breed,
norm_age,
norm_intake_reason,
norm_sex
))
# Create radar chart
radarchart(data, axistype = 1,
pcol = rgb(0.2, 0.5, 0.5, 0.9),
pfcol = rgb(0.2, 0.5, 0.5, 0.5),
plwd = 4,
cglcol = "grey",
cglty = 1,
axislabcol = "grey",
caxislabels = seq(0, 1, 0.2),
cglwd = 0.8,
vlcex = 0.8)
# Add title
title(paste(outcome_type))
}
# Set up grid for radar charts
par(mfrow = c(3, 3), mar = c(1, 1, 1, 1))
# Generate radar charts for different outcome types
radar_adoption <- radarchart_outcome_type(optipaw_clean, "Adoption")
radar_euthanasia <- radarchart_outcome_type(optipaw_clean, "Euthanasia")
radar_transfer <- radarchart_outcome_type(optipaw_clean, "Transfer")
radar_died <- radarchart_outcome_type(optipaw_clean, "Died")
radar_missing <- radarchart_outcome_type(optipaw_clean, "Missing")
radar_disposal <- radarchart_outcome_type(optipaw_clean, "Disposal")
radar_lost <- radarchart_outcome_type(optipaw_clean, "Lost")
radar_stolen <- radarchart_outcome_type(optipaw_clean, "Stolen")
radar_relocate <- radarchart_outcome_type(optipaw_clean, "Relocate")
radar_rto <- radarchart_outcome_type(optipaw_clean, "Return To Owner")
## [DATA NOT ENOUGH] at 3
## NaN
## [DATA NOT ENOUGH] at 3
## NaN
## [DATA NOT ENOUGH] at 3
## NaN
## [DATA NOT ENOUGH] at 3
## NaN
## [DATA NOT ENOUGH] at 3
## NaN
radar_rto_adoption <- radarchart_outcome_type(optipaw_clean, "Rto-Adopt") # Pet returned to owner, then adopted
# Reset the plot layout
par(mfrow = c(1, 1))
# Step 1: Count frequency of unique combinations of Intake.Type and Outcome.Type
counts <- optipaw_clean %>%
group_by(Intake.Type, Outcome.Type) %>%
summarise(count = n(), .groups = 'drop')
# Step 2: Calculate total number of entries (sum of all unique combinations)
total_entries <- sum(counts$count) # Sum of all occurrences, not just unique combinations
# Step 3: Convert frequency into percentage (count / total_entries) * 100
counts <- counts %>%
mutate(percentage = (count / total_entries) * 100) # Correct percentage calculation
# Step 4: Pivot the data for the heatmap, using only relevant columns (excluding 'count')
heatmap_data <- counts %>%
select(-count) %>% # Exclude the 'count' column to prevent it from being included in the heatmap
pivot_wider(names_from = Intake.Type, values_from = percentage, values_fill = 0)
# Step 5: Create interactive heatmap using plotly
fig <- plot_ly(
z = as.matrix(heatmap_data[,-1]), # The percentages for each combination (excluding the first column Outcome.Type)
x = colnames(heatmap_data)[-1], # Intake types (from pivot_wider)
y = heatmap_data$Outcome.Type, # Outcome types
type = "heatmap",
colorscale = "YlGnBu", # Use a green-blue color scale
reversescale = TRUE, # Reverse the color scale so larger values are darker
colorbar = list(title = "Percentage (%)") # Title for color legend (now based on percentage)
)
# Step 6: Update layout for better visualization
fig <- fig %>%
layout(
title = "Percentage of Unique Combinations between Intake Type and Outcome Type",
xaxis = list(title = "Intake Type"),
yaxis = list(title = "Outcome Type")
)
# Display the heatmap
fig
# Step 1: Convert the Outcome.Date to a date format and extract the month
optipaw <- optipaw %>%
mutate(Outcome.Date = ymd_hms(Outcome.Date), # Ensure Outcome.Date is parsed as a datetime
month = month(Outcome.Date, label = TRUE)) # Extracts the month as a factor with labels
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Outcome.Date = ymd_hms(Outcome.Date)`.
## Caused by warning:
## ! 8252 failed to parse.
# Step 2: Count frequency of each Outcome.Type per month
monthly_counts <- optipaw %>%
group_by(month, Outcome.Type) %>%
summarise(frequency = n(), .groups = 'drop') %>%
complete(month, Outcome.Type, fill = list(frequency = 0)) # Fill missing combinations with 0
# Step 3: Calculate the total frequency per month
monthly_totals <- monthly_counts %>%
group_by(month) %>%
summarise(total_frequency = sum(frequency))
# Step 4: Merge total frequency with the original data and convert to percentages
monthly_counts <- monthly_counts %>%
left_join(monthly_totals, by = "month") %>%
mutate(percentage = (frequency / total_frequency) * 100) # Convert to percentage
# Step 5: Reorder stacking order of outcome types to include all new values
monthly_counts$Outcome.Type <- factor(monthly_counts$Outcome.Type, levels = c(
'Adoption',
'Rto-Adopt',
'Return to Owner',
'Relocate',
'Transfer',
'Euthanasia',
'Missing',
'Disposal',
'Stolen',
'Lost',
'Died',
'Reclaimed',
'Released To Wild',
'Escaped',
'Foster'
))
# Step 6: Define the colors for the new outcomes
colors <- c('Adoption' = 'darkblue',
'Rto-Adopt' = 'turquoise',
'Return to Owner' = 'green',
'Relocate' = 'lightgreen',
'Transfer' = 'lightblue',
'Euthanasia' = 'yellow',
'Missing' = 'orange',
'Disposal' = 'pink',
'Stolen' = 'red',
'Lost' = 'darkred',
'Died' = 'darkgray',
'Reclaimed' = 'purple',
'Released To Wild' = 'brown',
'Escaped' = 'lightpink',
'Foster' = 'lightyellow')
# Step 7: Create interactive stacked bar plot using plotly, now using percentage
fig <- plot_ly(
data = monthly_counts,
x = ~month,
y = ~percentage, # Use percentage instead of frequency
color = ~Outcome.Type,
type = "bar",
colors = colors # Apply the color scheme for the updated Outcome Types
) %>%
layout(
title = "Percentage of Outcome Types by Month",
xaxis = list(title = "Months"),
yaxis = list(title = "Percentage of Outcome Types", range = c(0, 100)), # Set y-axis range to 0-100%
barmode = 'stack' # Stacks the bars
)
fig
## Warning: Ignoring 15 observations
For every outcome type, make a box plot on the distribution for time length in shelter.
# Convert 'Intake.Date' and 'Outcome.Date' to date format
optipaw$Intake.Date <- ymd_hms(optipaw$Intake.Date)
optipaw$Outcome.Date <- ymd_hms(optipaw$Outcome.Date)
## Warning: 1164 failed to parse.
# Calculate time in shelter in months
optipaw <- optipaw %>%
mutate(Time.In.Shelter = time_length(interval(Intake.Date, Outcome.Date), "months")) %>%
filter(Time.In.Shelter > 0) # Remove any rows with negative or zero months
# Create the box plot using plotly
fig <- plot_ly(optipaw, y = ~Outcome.Type, x = ~Time.In.Shelter, type = "box", boxpoints = FALSE)
fig <- fig %>% layout(title = "Distribution of Time in Shelter (Months) by Outcome Type",
xaxis = list(title = "Length of Time in Shelter (Months)"),
yaxis = list(title = "Outcome Type"))
# Show the plot
fig